setwd("le répertoire de mes données")
#knitr::opts_knit$set(root.dir = "~/Documents/GitHub/m1act_Spring21")
datadir <- "~/Documents/GitHub/m1act_Spring21/data"
exportdir <- "~/Documents/GitHub/m1act_Spring21/export"
graphdir <- "~/Documents/GitHub/m1act_Spring21/graph"
library(faraway)
## Warning: package 'faraway' was built under R version 4.0.2
aaa=rnorm(100)
bbb=rpois(100,1)
save(aaa,bbb,file=paste(exportdir,"simulations.RData",sep="/"))
rm(aaa,bbb)
try(aaa)
## Error in try(aaa) : objet 'aaa' introuvable
try(bbb)
## Error in try(bbb) : objet 'bbb' introuvable
load(paste(exportdir,"simulations.RData",sep="/"))
aaa
## [1] 0.53918008 3.35624731 0.32530810 0.30869853 0.45812917 -0.18028935
## [7] 0.78764959 -0.45366040 -0.70406372 0.09766136 -0.76862059 0.40349567
## [13] 0.71100150 -0.22341413 0.01130540 -2.31648331 1.16312136 0.66699364
## [19] -0.15041204 -0.36647171 0.09449877 0.36446214 -0.73098304 1.53584982
## [25] -0.14314949 -0.37721125 -1.45730393 -0.40967923 -1.35274330 -0.36881981
## [31] 0.16734731 -1.27853680 -1.77817152 1.01272806 -1.33617097 1.20966020
## [37] 0.26506061 0.31971658 -2.25323741 1.82210220 1.70244205 1.23380975
## [43] 0.78877631 0.69810510 0.73055558 0.82137775 0.38815851 -0.66786133
## [49] 0.53397497 -0.26166113 0.60453192 1.29054167 0.54994984 -1.51491687
## [55] 0.61199878 -0.24943743 -0.62982501 0.55483869 0.97976751 1.47480814
## [61] 1.85454583 0.67726936 0.57139608 0.09040689 -1.36899934 1.36310270
## [67] -0.60291506 -0.84186371 -0.26349584 0.30337803 -0.86720225 0.23917734
## [73] -1.84514221 0.31440717 0.01304982 -0.07712896 0.03915341 1.20890182
## [79] 1.27923318 1.70402725 0.18105603 0.77303566 0.85683752 -0.23032459
## [85] 1.15371667 -0.53067714 -1.76710707 0.26799246 1.94570215 0.81701053
## [91] 0.85796336 1.38389143 -0.45382472 -0.36096324 1.19460176 -1.03973040
## [97] 0.78957432 -0.88123257 -0.24367202 -1.04461117
bbb
## [1] 1 1 1 2 1 1 4 0 0 1 1 1 1 1 1 4 2 1 0 1 1 1 1 1 2 0 5 0 1 0 0 0 1 0 0 0 2
## [38] 0 1 2 1 2 1 2 2 2 0 1 1 0 1 0 3 0 2 2 1 0 1 0 3 1 1 0 2 3 0 3 3 0 1 0 1 1
## [75] 0 0 0 0 1 1 1 2 1 1 2 0 2 1 0 2 2 2 1 1 1 4 1 1 0 2
load(paste(datadir,"vulnerability.RData",sep="/"))
#attach(vul)
#country_name
#detach(vul)
with(vul, country_name)
## [1] Albania Algeria
## [3] Angola Argentina
## [5] Armenia Australia
## [7] Austria Azerbaijan
## [9] Bahamas Bangladesh
## [11] Belarus Belgium
## [13] Belize Benin
## [15] Bhutan Bolivia
## [17] Bosnia-Hercegovenia Botswana
## [19] Brazil Bulgaria
## [21] Burkina Faso Burundi
## [23] Cambodia Cameroon
## [25] Canada Central African Rep
## [27] Chad Chile
## [29] China P Rep Colombia
## [31] Congo Costa Rica
## [33] Cote d'Ivoire Croatia
## [35] Cuba Czech Rep
## [37] Dominican Rep Ecuador
## [39] Egypt El Salvador
## [41] Eritrea Ethiopia
## [43] Fiji France
## [45] Gambia The Georgia
## [47] Germany Ghana
## [49] Greece Grenada
## [51] Guatemala Guinea
## [53] Guinea Bissau Guyana
## [55] Haiti Honduras
## [57] Hong Kong (China) Hungary
## [59] India Indonesia
## [61] Iran Islam Rep Ireland
## [63] Israel Italy
## [65] Jamaica Japan
## [67] Jordan Kazakhstan
## [69] Kenya Korea Rep
## [71] Kyrgyzstan Lao P Dem Rep
## [73] Lebanon Lesotho
## [75] Lithuania Macedonia FRY
## [77] Madagascar Malawi
## [79] Malaysia Mali
## [81] Mauritania Mauritius
## [83] Mexico Moldova Rep
## [85] Mongolia Morocco
## [87] Mozambique Myanmar
## [89] Namibia Nepal
## [91] Netherlands New Zealand
## [93] Nicaragua Niger
## [95] Nigeria Norway
## [97] Oman Pakistan
## [99] Panama Papua New Guinea
## [101] Paraguay Peru
## [103] Philippines Poland
## [105] Portugal Romania
## [107] Russia Rwanda
## [109] Samoa Saudi Arabia
## [111] Senegal Sierra Leone
## [113] Slovakia South Africa
## [115] Spain Sri Lanka
## [117] St Lucia St Vincent and The Grenadines
## [119] Sudan Suriname
## [121] Swaziland Switzerland
## [123] Syrian Arab Rep Tajikistan
## [125] Tanzania Uni Rep Thailand
## [127] Timor-Leste Togo
## [129] Tonga Trinidad and Tobago
## [131] Tunisia Turkey
## [133] Uganda Ukraine
## [135] United Kingdom United States
## [137] Uruguay Vanuatu
## [139] Venezuela Viet Nam
## [141] Yemen Zaire/Congo Dem Rep
## [143] Zambia Zimbabwe
## 144 Levels: Albania Algeria Angola Argentina Armenia Australia ... Zimbabwe
summary(vul)
## country_name ln_urb ln_events ln_fert
## Albania : 1 Min. :2.182 Min. :0.6931 Min. :0.3716
## Algeria : 1 1st Qu.:3.421 1st Qu.:2.0794 1st Qu.:0.9507
## Angola : 1 Median :3.911 Median :2.8332 Median :1.4585
## Argentina: 1 Mean :3.773 Mean :2.7752 Mean :1.3283
## Armenia : 1 3rd Qu.:4.190 3rd Qu.:3.3758 3rd Qu.:1.7047
## Australia: 1 Max. :4.588 Max. :5.9480 Max. :2.0477
## (Other) :138
## hdi ln_pop ln_death_risk death_risk
## Min. :0.3350 Min. : 0.5878 Min. :-4.3920 Min. : 0.01238
## 1st Qu.:0.5321 1st Qu.: 4.2487 1st Qu.:-1.3588 1st Qu.: 0.25697
## Median :0.7360 Median : 5.2097 Median :-0.3045 Median : 0.73753
## Mean :0.6887 Mean : 5.1372 Mean :-0.1689 Mean : 5.62025
## 3rd Qu.:0.8034 3rd Qu.: 6.2394 3rd Qu.: 0.7831 3rd Qu.: 2.18859
## Max. :0.9530 Max. :10.0206 Max. : 5.1184 Max. :167.07002
##
with(vul,pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, main="Simple Scatterplot Matrix"))
if(!("GGally" %in% rownames(installed.packages()))){install.packages("GGally")}
library(GGally)
## Warning: package 'ggplot2' was built under R version 4.0.2
with(vul, ggpairs(vul[,c("ln_death_risk","ln_events","ln_fert","hdi","ln_pop")]))
fit_univ = lm(ln_death_risk~ln_events,data=vul)
newdata=data.frame(ln_events=3.4)
pred=predict(fit_univ,newdata,interval="predict")
ic=predict(fit_univ,interval="confidence")
print(pred)
## fit lwr upr
## 1 0.1543123 -3.185642 3.494266
print(ic[1:5,])
## fit lwr upr
## 1 -0.41346753 -0.72116718 -0.1057679
## 2 0.20424358 -0.14006908 0.5485563
## 3 -0.02960412 -0.31691647 0.2577082
## 4 0.27723443 -0.09224715 0.6467160
## 5 -0.88753758 -1.36903423 -0.4060409
if(!("HH" %in% rownames(installed.packages()))){install.packages("HH")}
library('HH')
## Warning: package 'HH' was built under R version 4.0.2
## Warning: package 'multcomp' was built under R version 4.0.2
## Warning: package 'survival' was built under R version 4.0.2
## Warning: package 'TH.data' was built under R version 4.0.2
## Warning: package 'MASS' was built under R version 4.0.2
ci.plot(fit_univ)
fit = lm(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
summary(fit)
##
## Call:
## lm(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop,
## data = vul)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4518 -0.7673 -0.1513 0.5669 6.2271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.3485 1.5175 -3.524 0.000575 ***
## ln_events 1.3708 0.1792 7.649 3.04e-12 ***
## ln_fert 2.1961 0.4614 4.760 4.81e-06 ***
## hdi 1.9922 1.2628 1.578 0.116928
## ln_pop -0.5672 0.1026 -5.529 1.54e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-squared: 0.4221, Adjusted R-squared: 0.4055
## F-statistic: 25.38 on 4 and 139 DF, p-value: 8.522e-16
anova(fit)
## Analysis of Variance Table
##
## Response: ln_death_risk
## Df Sum Sq Mean Sq F value Pr(>F)
## ln_events 1 36.775 36.775 20.186 1.466e-05 ***
## ln_fert 1 71.336 71.336 39.156 4.542e-09 ***
## hdi 1 21.154 21.154 11.611 0.0008576 ***
## ln_pop 1 55.703 55.703 30.575 1.540e-07 ***
## Residuals 139 253.237 1.822
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coef(fit)
## (Intercept) ln_events ln_fert hdi ln_pop
## -5.3484985 1.3708219 2.1960509 1.9921835 -0.5672164
head(residuals(fit))
## 1 2 3 4 5 6
## -0.4655284 0.1040698 -0.6106572 -0.9839096 0.1853458 -1.9841708
X = vul[,c("ln_events","ln_fert","hdi","ln_pop")]
all(X==vul[,c(3:6)])
## [1] TRUE
cor_mat = cor(X)
cov_mat = cov(X)
propres = eigen(cor_mat)
propres$values
## [1] 1.9963221 1.5984915 0.2715593 0.1336271
propres$values[1] / propres$values
## [1] 1.000000 1.248879 7.351330 14.939503
library(HH,quietly = TRUE)
vif(fit)
## ln_events ln_fert hdi ln_pop
## 2.421759 3.642415 3.767663 2.460624
Les VIF sont tous inférieurs à 5, donc pas de problème de colinéarité.
Calcul des VIFs directement à partir des formules.
R2.1 = summary(lm(ln_events~ln_fert+hdi+ln_pop, data=vul))$r.squared
(VIF1 <- 1/(1 - R2.1))
## [1] 2.421759
R2.2 = summary(lm(ln_fert~ln_events+hdi+ln_pop, data=vul))$r.squared
(VIF2 <- 1/(1 - R2.2))
## [1] 3.642415
R2.3 = summary(lm(hdi~ln_events+ln_fert+ln_pop, data=vul))$r.squared
(VIF3 <- 1/(1 - R2.3))
## [1] 3.767663
R2.4 = summary(lm(ln_pop~ln_events+ln_fert+hdi, data=vul))$r.squared
(VIF4 <- 1/(1 - R2.4))
## [1] 2.460624
yhat = fitted(fit)
#fit$fitted.values
e = residuals(fit)
#fit$residuals
Vérification du calcul des résidus à partir des valeurs ajustées. Utilité de la fonction all.equal par rapport à la comparaison directe avec ==.
with(vul, all(ln_death_risk-yhat==residuals(fit)))
## [1] FALSE
with(vul, all.equal(ln_death_risk-yhat,residuals(fit)))
## [1] TRUE
e_prime = rstandard(fit)
e_star = rstudent(fit)
lattice::histogram(e_prime-e_star)
boxplot(e_prime-e_star)
#plot(fit,which=2)
shapiro.test(e)
##
## Shapiro-Wilk normality test
##
## data: e
## W = 0.94169, p-value = 1.058e-05
qqnorm(e)
qqline(e)
L’asymétrie de la distribution de la racine carré de la valeur absolue des résidus standardisés \(\sqrt{|E|}\) est beaucoup plus faible que celle de la valeur absolue des résidus standardisés \(|E|\) si les \(E\) sont distribuées suivant des lois normales centrées \(N(0,.)\). Utiliser cette racine carrée permet donc d’avoir un indicateur supplémentaire de normalité.
plot(fit,which=1)
if(!("lmtest" %in% rownames(installed.packages()))){install.packages("lmtest")}
plot(fit,which=3)
library(lmtest, quietly = TRUE)
## Warning: package 'lmtest' was built under R version 4.0.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
bptest(ln_death_risk~ln_events+ln_fert+hdi+ln_pop+I(ln_events^2)+I(ln_fert^2)+I(hdi^2)+I(ln_pop^2),data = vul)
##
## studentized Breusch-Pagan test
##
## data: ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop + I(ln_events^2) + I(ln_fert^2) + I(hdi^2) + I(ln_pop^2)
## BP = 8.3579, df = 8, p-value = 0.3993
##Pas de normalité -> permutations ###Version 1 avec lmPerm
library(lmPerm)
## Warning: package 'lmPerm' was built under R version 4.0.2
fit_p <- lmp(ln_death_risk~ln_events+ln_fert+hdi+ln_pop, data=vul)
## [1] "Settings: unique SS : numeric variables centered"
summary(fit_p)
##
## Call:
## lmp(formula = ln_death_risk ~ ln_events + ln_fert + hdi + ln_pop,
## data = vul)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4518 -0.7673 -0.1513 0.5669 6.2271
##
## Coefficients:
## Estimate Iter Pr(Prob)
## ln_events 1.3708 5000 <2e-16 ***
## ln_fert 2.1961 5000 <2e-16 ***
## hdi 1.9922 1143 0.0805 .
## ln_pop -0.5672 5000 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.35 on 139 degrees of freedom
## Multiple R-Squared: 0.4221, Adjusted R-squared: 0.4055
## F-statistic: 25.38 on 4 and 139 DF, p-value: 8.522e-16
###Version 2 avec pgirmess
library(pgirmess)
## Warning: package 'pgirmess' was built under R version 4.0.2
fit_p2 <- PermTest(fit)
print(fit_p2)
##
## Monte-Carlo test
##
## Call:
## PermTest.lm(obj = fit)
##
## Based on 1000 replicates
## Simulated p-value:
## p.value
## ln_events 0.000
## ln_fert 0.000
## hdi 0.002
## ln_pop 0.000
n =50
X=rnorm(n,1)
epsilon = rnorm(n,0,0.5)
Yok = 2 +3* X+epsilon
lmok = lm(Yok~X)
Yhs = 2 +3* X+ abs(X)^2*epsilon
lmhs = lm(Yhs~X)
par(mfrow=c(1,2))
plot(lmok,which=1,main="cas ok")
plot(lmhs,which=1,main="cas heteroscedastique")
#plot(lmnl,which=1,main="cas non lineaire")
par(mfrow=c(1,2))
plot(lmok,which=3,main="cas ok")
plot(lmhs,which=3,main="cas heteroscedastique")
#plot(lmnl,which=3,main="cas non lineaire")
pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, main="Simple Scatterplot Matrix", data=vul, col=(24+36*as.numeric(abs(e_star)>2)))
#Illustrer notion de levier
influences = lm.influence(fit)
hat = influences$hat
pairs(~ln_death_risk+ln_events+ln_fert+hdi+ln_pop, data=vul,
main="Simple Scatterplot Matrix",
col=(24+50*as.numeric(hat>(2*4+2) / nrow(vul))))
any((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## [1] FALSE
which((e^2-hat)/((ncol(X)-1+1)*(1-hat)^2)>4/nrow(vul))
## integer(0)
par(mfrow=c(1,2))
plot(fit, which=4:5)
dfbetas=apply(abs(influences$coefficients)/influences$sigma,1,`/`,sqrt(c(144,diag(cov_mat))))
colSums(dfbetas>2/nrow(vul))
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## 1 1 1 2 2 2 2 1 2 4 1 2 2 1 3 1 2 0 2 2
## 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
## 2 2 2 1 2 3 1 1 1 0 2 1 1 2 1 2 2 2 3 2
## 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 3 1 1 1 1 2 1 2 2 4 2 1 3 1 4 4 1 2 1 1
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 1 2 2 0 1 0 4 3 2 2 1 1 3 1 2 2 1 1 2 3
## 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
## 1 1 2 3 1 2 2 5 2 2 1 1 3 2 2 2 4 1 2 1
## 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
## 1 2 0 2 1 2 2 1 2 2 1 2 2 2 1 1 2 1 1 2
## 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
## 2 2 3 2 1 1 3 0 4 1 2 2 1 2 1 2 2 0 4 2
## 141 142 143 144
## 0 1 1 0
n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)
Ynl = 2 - X[,1] + 3* X[,2]^2 + epsilon
lmnl = lm(Ynl~X[,1]+X[,2])
par(mfrow=c(1,2))
plot(lmnl, which = 1:2)
par(mfrow=c(1,2))
plot(lmnl, which = 3:4)
library(car)
## Warning: package 'car' was built under R version 4.0.2
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following objects are masked from 'package:HH':
##
## logit, vif
## The following objects are masked from 'package:faraway':
##
## logit, vif
crPlots(lmnl)
X2_2 = X[,2]^2
lmnl_2 = lm(Ynl~X[,1]+X2_2)
crPlots(lmnl_2)
n =50
X=matrix(rnorm(n*2),ncol=2)
epsilon = rnorm(n,0,0.5)
ln_Y = 1 - X[,1] + 0.1* X[,2] + epsilon
Y = exp(ln_Y)
lm_ln = lm(Y~X[,1]+X[,2])
par(mfrow=c(1,2))
plot(lm_ln, which = 1:2)
par(mfrow=c(1,2))
plot(lm_ln, which = 3:4)
crPlots(lm_ln)
lm_ln_trans = lm(log(Y)~X[,1]+X[,2])
plot(lm_ln_trans)
crPlots(lm_ln_trans)
?mtcars
mtcars_simple = mtcars[c(1,2,6)]
summary(mtcars_simple)
## mpg cyl wt
## Min. :10.40 Min. :4.000 Min. :1.513
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:2.581
## Median :19.20 Median :6.000 Median :3.325
## Mean :20.09 Mean :6.188 Mean :3.217
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:3.610
## Max. :33.90 Max. :8.000 Max. :5.424
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.0.2
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following object is masked from 'package:gridExtra':
##
## combine
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <dbl> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
mtcars_simple<- dplyr::mutate(mtcars_simple, cyl = factor(cyl))
glimpse(mtcars_simple)
## Rows: 32
## Columns: 3
## $ mpg <dbl> 21.0, 21.0, 22.8, 21.4, 18.7, 18.1, 14.3, 24.4, 22.8, 19.2, 17.8,…
## $ cyl <fct> 6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 8,…
## $ wt <dbl> 2.620, 2.875, 2.320, 3.215, 3.440, 3.460, 3.570, 3.190, 3.150, 3.…
fit_simple = lm(mpg~wt+factor(cyl),data=mtcars_simple)
summary(fit_simple)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5890 -1.2357 -0.5159 1.3845 5.7915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.9908 1.8878 18.006 < 2e-16 ***
## wt -3.2056 0.7539 -4.252 0.000213 ***
## factor(cyl)6 -4.2556 1.3861 -3.070 0.004718 **
## factor(cyl)8 -6.0709 1.6523 -3.674 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared: 0.8374, Adjusted R-squared: 0.82
## F-statistic: 48.08 on 3 and 28 DF, p-value: 3.594e-11
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:latticeExtra':
##
## layer
ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point()
fit_croise = lm(mpg ~ wt * cyl, data = mtcars_simple)
summary(fit_croise)
##
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1513 -1.3798 -0.6389 1.4938 5.2523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.571 3.194 12.389 2.06e-12 ***
## wt -5.647 1.359 -4.154 0.000313 ***
## cyl6 -11.162 9.355 -1.193 0.243584
## cyl8 -15.703 4.839 -3.245 0.003223 **
## wt:cyl6 2.867 3.117 0.920 0.366199
## wt:cyl8 3.455 1.627 2.123 0.043440 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared: 0.8616, Adjusted R-squared: 0.8349
## F-statistic: 32.36 on 5 and 26 DF, p-value: 2.258e-10
ggplot(mtcars_simple, aes(x=wt, y=mpg, color=cyl, shape=cyl)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE)
## `geom_smooth()` using formula 'y ~ x'
ggplot(mtcars_simple, aes(x=wt, y=mpg)) +
geom_point() +
geom_smooth(method=lm, se=FALSE, fullrange=TRUE, formula = y ~ x)
summary(fit_simple)
##
## Call:
## lm(formula = mpg ~ wt + factor(cyl), data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5890 -1.2357 -0.5159 1.3845 5.7915
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.9908 1.8878 18.006 < 2e-16 ***
## wt -3.2056 0.7539 -4.252 0.000213 ***
## factor(cyl)6 -4.2556 1.3861 -3.070 0.004718 **
## factor(cyl)8 -6.0709 1.6523 -3.674 0.000999 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.557 on 28 degrees of freedom
## Multiple R-squared: 0.8374, Adjusted R-squared: 0.82
## F-statistic: 48.08 on 3 and 28 DF, p-value: 3.594e-11
summary(fit_croise)
##
## Call:
## lm(formula = mpg ~ wt * cyl, data = mtcars_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.1513 -1.3798 -0.6389 1.4938 5.2523
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.571 3.194 12.389 2.06e-12 ***
## wt -5.647 1.359 -4.154 0.000313 ***
## cyl6 -11.162 9.355 -1.193 0.243584
## cyl8 -15.703 4.839 -3.245 0.003223 **
## wt:cyl6 2.867 3.117 0.920 0.366199
## wt:cyl8 3.455 1.627 2.123 0.043440 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.449 on 26 degrees of freedom
## Multiple R-squared: 0.8616, Adjusted R-squared: 0.8349
## F-statistic: 32.36 on 5 and 26 DF, p-value: 2.258e-10
anova(fit_simple)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## wt 1 847.73 847.73 129.6650 5.079e-12 ***
## factor(cyl) 2 95.26 47.63 7.2856 0.002835 **
## Residuals 28 183.06 6.54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_croise)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## wt 1 847.73 847.73 141.3883 5.133e-12 ***
## cyl 2 95.26 47.63 7.9443 0.00203 **
## wt:cyl 2 27.17 13.58 2.2658 0.12386
## Residuals 26 155.89 6.00
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(fit_simple,fit_croise)
## Analysis of Variance Table
##
## Model 1: mpg ~ wt + factor(cyl)
## Model 2: mpg ~ wt * cyl
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 28 183.06
## 2 26 155.89 2 27.17 2.2658 0.1239